{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Untitled11.ipynb",
      "version": "0.3.2",
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "twOJzFIIuF0w",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# This program predicts stock prices by using machine learning models\n",
        "\n",
        "#Install the dependencies\n",
        "import quandl\n",
        "import numpy as np \n",
        "from sklearn.linear_model import LinearRegression\n",
        "from sklearn.svm import SVR\n",
        "from sklearn.model_selection import train_test_split"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8l3KwJoovJLC",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 175
        },
        "outputId": "0b2c989f-3db8-48bc-ede8-03f1bf8e8ec1"
      },
      "source": [
        "#Get the stock data\n",
        "df = quandl.get(\"WIKI/AMZN\")\n",
        "# Take a look at the data\n",
        "print(df.head())"
      ],
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "             Open   High    Low  ...  Adj. Low  Adj. Close  Adj. Volume\n",
            "Date                             ...                                   \n",
            "1997-05-16  22.38  23.75  20.50  ...  1.708333    1.729167   14700000.0\n",
            "1997-05-19  20.50  21.25  19.50  ...  1.625000    1.708333    6106800.0\n",
            "1997-05-20  20.75  21.00  19.63  ...  1.635833    1.635833    5467200.0\n",
            "1997-05-21  19.25  19.75  16.50  ...  1.375000    1.427500   18853200.0\n",
            "1997-05-22  17.25  17.38  15.75  ...  1.312500    1.395833   11776800.0\n",
            "\n",
            "[5 rows x 12 columns]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "lT9a4KJxvi67",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 140
        },
        "outputId": "52b7a080-adf7-4743-8ced-23688173dd2f"
      },
      "source": [
        "# Get the Adjusted Close Price\n",
        "df = df[['Adj. Close']]\n",
        "#Take a look at the new data\n",
        "print(df.head())"
      ],
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "            Adj. Close\n",
            "Date                  \n",
            "1997-05-16    1.729167\n",
            "1997-05-19    1.708333\n",
            "1997-05-20    1.635833\n",
            "1997-05-21    1.427500\n",
            "1997-05-22    1.395833\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jI5ZFdugvz0Z",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 140
        },
        "outputId": "02e07ebf-8e83-404c-9282-0aa2b15f3130"
      },
      "source": [
        "# A variable for predicting 'n' days out into the future\n",
        "forecast_out = 30 #'n=30' days\n",
        "#Create another column (the target or dependent variable) shifted 'n' units up\n",
        "df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)\n",
        "#print the new data set\n",
        "print(df.tail())\n"
      ],
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "            Adj. Close  Prediction\n",
            "Date                              \n",
            "2018-03-21     1581.86         NaN\n",
            "2018-03-22     1544.10         NaN\n",
            "2018-03-23     1495.56         NaN\n",
            "2018-03-26     1555.86         NaN\n",
            "2018-03-27     1497.05         NaN\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "FyJ8Juj0xfQG",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 140
        },
        "outputId": "303ae60f-cd59-4356-9245-76a24da345ce"
      },
      "source": [
        "### Create the independent data set (X)  #######\n",
        "# Convert the dataframe to a numpy array\n",
        "X = np.array(df.drop(['Prediction'],1))\n",
        "\n",
        "#Remove the last 'n' rows\n",
        "X = X[:-forecast_out]\n",
        "print(X)"
      ],
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[[   1.72916667]\n",
            " [   1.70833333]\n",
            " [   1.63583333]\n",
            " ...\n",
            " [1350.47      ]\n",
            " [1338.99      ]\n",
            " [1386.23      ]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XSFFcCUCyJGl",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 52
        },
        "outputId": "d3bf26d7-67c3-4cc1-e854-60e7636364b0"
      },
      "source": [
        "### Create the dependent data set (y)  #####\n",
        "# Convert the dataframe to a numpy array (All of the values including the NaN's)\n",
        "y = np.array(df['Prediction'])\n",
        "# Get all of the y values except the last 'n' rows\n",
        "y = y[:-forecast_out]\n",
        "print(y)"
      ],
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[1.54166667e+00 1.51583333e+00 1.58833333e+00 ... 1.49556000e+03\n",
            " 1.55586000e+03 1.49705000e+03]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "uTs969WyzAVv",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Split the data into 80% training and 20% testing\n",
        "x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mOU8vGVyzdtk",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 52
        },
        "outputId": "bc0e6671-92f9-4793-d6cd-aef197ac5962"
      },
      "source": [
        "# Create and train the Support Vector Machine (Regressor)\n",
        "svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)\n",
        "svr_rbf.fit(x_train, y_train)"
      ],
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,\n",
              "    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 21
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dE99HjCa0Ft9",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "d7a24fc9-a364-4d5e-848f-be7856f2dad5"
      },
      "source": [
        "# Testing Model: Score returns the coefficient of determination R^2 of the prediction. \n",
        "# The best possible score is 1.0\n",
        "svm_confidence = svr_rbf.score(x_test, y_test)\n",
        "print(\"svm confidence: \", svm_confidence)"
      ],
      "execution_count": 22,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "svm confidence:  0.9274190417518909\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ThPLji_30jUh",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "6979bf00-01cc-4a99-936a-e29cf7d14bcb"
      },
      "source": [
        "# Create and train the Linear Regression  Model\n",
        "lr = LinearRegression()\n",
        "# Train the model\n",
        "lr.fit(x_train, y_train)"
      ],
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 23
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jCPA1KZj0z-A",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "c22f33b3-a840-4666-8861-3b4d3f049776"
      },
      "source": [
        "# Testing Model: Score returns the coefficient of determination R^2 of the prediction. \n",
        "# The best possible score is 1.0\n",
        "lr_confidence = lr.score(x_test, y_test)\n",
        "print(\"lr confidence: \", lr_confidence)"
      ],
      "execution_count": 24,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "lr confidence:  0.9874918531515935\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zJ2PwS_k1EmG",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 543
        },
        "outputId": "fddf4529-790f-44cc-fef2-bb17df3933c5"
      },
      "source": [
        "# Set x_forecast equal to the last 30 rows of the original data set from Adj. Close column\n",
        "x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]\n",
        "print(x_forecast)"
      ],
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[[1414.51]\n",
            " [1451.05]\n",
            " [1461.76]\n",
            " [1448.69]\n",
            " [1468.35]\n",
            " [1482.92]\n",
            " [1484.76]\n",
            " [1500.  ]\n",
            " [1521.95]\n",
            " [1511.98]\n",
            " [1512.45]\n",
            " [1493.45]\n",
            " [1500.25]\n",
            " [1523.61]\n",
            " [1537.64]\n",
            " [1545.  ]\n",
            " [1551.86]\n",
            " [1578.89]\n",
            " [1598.39]\n",
            " [1588.18]\n",
            " [1591.  ]\n",
            " [1582.32]\n",
            " [1571.68]\n",
            " [1544.93]\n",
            " [1586.51]\n",
            " [1581.86]\n",
            " [1544.1 ]\n",
            " [1495.56]\n",
            " [1555.86]\n",
            " [1497.05]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6FAGkNOr1l64",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 228
        },
        "outputId": "8449bdff-96ae-4884-d7df-e753acb96915"
      },
      "source": [
        "# Print linear regression model predictions for the next 'n' days\n",
        "lr_prediction = lr.predict(x_forecast)\n",
        "print(lr_prediction)\n",
        "\n",
        "# Print support vector regressor model predictions for the next 'n' days\n",
        "svm_prediction = svr_rbf.predict(x_forecast)\n",
        "print(svm_prediction)"
      ],
      "execution_count": 26,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[1494.09445102 1532.76646354 1544.10136377 1530.26876377 1551.07587287\n",
            " 1566.4959939  1568.44335304 1584.5725668  1607.80329135 1597.25156817\n",
            " 1597.74899143 1577.64039159 1584.83715364 1609.56014796 1624.40876142\n",
            " 1632.19819799 1639.45846087 1668.06559001 1688.70336352 1677.89763698\n",
            " 1680.88217653 1671.69572145 1660.43490554 1632.12411367 1676.13019689\n",
            " 1671.20888167 1631.24568536 1579.87350452 1643.69185031 1581.45044209]\n",
            "[1048.26903008  660.32920232  659.32078508  687.16846237  659.32078508\n",
            "  659.32078508  659.32078508  659.32078508  659.32078508  659.32078508\n",
            "  659.32078508  659.32078508  659.32078508  659.32078508  659.32078508\n",
            "  659.32078508  659.32078508  659.32078508  659.32078508  659.32078508\n",
            "  659.32078508  659.32078508  659.32078508  659.32078508  659.32078508\n",
            "  659.32078508  659.32078508  659.32078508  659.32078508  659.32078508]\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}
